// hhfullalignment.C

#include "hhhalfalignment.h"

/////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////
// Methods of class HalfAlignment
/////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////

/////////////////////////////////////////////////////////////////////////////////////
// Constructor
HalfAlignment::HalfAlignment(int maxseqdis) {
  n = 0;
  sname = seq = NULL;
  nss_dssp = nss_pred = nss_conf = nsa_dssp = ncons = -1;
  h = new int[maxseqdis];   //h[k] = next position of sequence k to be written
  s = new char*[maxseqdis];  //s[k][h] = character in column h, sequence k of output alignment
  l = new int*[maxseqdis];  //counts non-gap residues: l[k][i] = index of last residue AT OR BEFORE match state i in seq k
  m = new int*[maxseqdis];  //counts positions:        m[k][i] = position of match state i in string seq[k]
  if (!h || !s || !l || !m)
    MemoryError("space for formatting HMM-HMM alignment", __FILE__, __LINE__,
                __func__);
}

/////////////////////////////////////////////////////////////////////////////////////
// Destructor
HalfAlignment::~HalfAlignment() {
  Unset();
  delete[] h;
  delete[] s;
  delete[] l;
  delete[] m;
}

/////////////////////////////////////////////////////////////////////////////////////
// Free memory in HalfAlignment arrays s[][], l[][], and m[][]
void HalfAlignment::Unset() {
  // Free memory for alignment characters and residue counts
  for (int k = 0; k < n; k++) {
    delete[] s[k];
    delete[] l[k];
    delete[] m[k];
  }
  n = 0;
  sname = seq = NULL;
  nss_dssp = nss_pred = nss_conf = nsa_dssp = ncons = -1;
}

/////////////////////////////////////////////////////////////////////////////////////
// Prepare a2m/a3m alignment: Calculate l[k][i] (residue indices) and m[k][i] (position in seq[k])
void HalfAlignment::Set(char* name, char** seq_in, char** sname_in, int n_in,
                        int L_in, int n1, int n2, int n3, int n4, int nc) {
  int i;     //counts match states in seq[k]
  int ll;    //counts residues LEFT from or at current position in seq[k]
  int mm;    //counts postions in string seq[k]
  int k;     //counts sequences
  char c;
  char warned = 0;

  nss_dssp = n1;
  nss_pred = n2;
  nss_conf = n3;
  nsa_dssp = n4;
  ncons = nc;
  seq = seq_in;     //flat copy of sequences
  sname = sname_in;  //flat copy of sequence names
  n = n_in;
  L = L_in;
  pos = 0;

  // Allocate memory for alignment characters and residue counts
  for (k = 0; k < n; k++) {
    s[k] = new char[LINELEN];
    l[k] = new int[L + 10];
    m[k] = new int[L + 10];
    if (!s[k] || !l[k] || !m[k])
      MemoryError("space for formatting HMM-HMM alignment", __FILE__, __LINE__,
                  __func__);
    h[k] = 0;  //starting positions in alignment = 0
  }

  for (k = 0; k < n; k++) {
    m[k][0] = 0;  // 0'th match state (virtual) is begin state at 0
    //if k is consensus sequence
    if (k == nc) {
      for (i = 1; i <= L; i++)
        m[k][i] = l[k][i] = i;
      m[k][L + 1] = l[k][L + 1] = L;
      continue;
    }
    i = 1;
    mm = 1;
    ll = 1;
    while ((c = seq[k][mm])) {
      if (MatchChr(c) == c && i <= L)    //count match/delete states
      {
        l[k][i] = ll;
        m[k][i] = mm;
        i++;
      }
      if (WordChr(c))
        ll++;  //index of next residue
      mm++;
    }
    l[k][i] = ll - 1;  //set l[k][L+1] eq number of residues in seq k (-1 since there is no residue at L+1st match state)
    m[k][i] = mm;   //set m[k][L+1]
    if ((i - 1) != L && !warned) {
      HH_LOG(WARNING) << "Sequence " << sname[k] << " in HMM " << name
                << " has " << i << " match states but should have " << L
                << "\n";
      warned = 1;
    }
  }

  HH_LOG(DEBUG2) << "  i chr   m   l\n";
  for (i = 0; i <= L + 1; i++)
    HH_LOG(DEBUG2) << i << "   " << seq[0][m[0][i]] << " " << m[0][i]
                             << " " << l[0][i] << std::endl;
  HH_LOG(DEBUG2) << std::endl;
}

/////////////////////////////////////////////////////////////////////////////////////
// Fill in insert states following match state i (without inserting '.' to fill up)
void HalfAlignment::AddInserts(int i) {
  for (int k = 0; k < n; k++)                        // for all sequences...
    for (int mm = m[k][i] + 1; mm < m[k][i + 1]; mm++)  // for all inserts between match state i and i+1...
      s[k][h[k]++] = seq[k][mm];      // fill inserts into output alignment s[k]
}

/////////////////////////////////////////////////////////////////////////////////////
// Fill up alignment with gaps '.' to generate flush end (all h[k] equal)
void HalfAlignment::FillUpGaps() {
  int k;      //counts sequences
  pos = 0;

  // Determine max position h[k]
  for (k = 0; k < n; k++)
    pos = imax(h[k], pos);

  // Fill in gaps up to pos
  for (k = 0; k < n; k++) {
    for (int hh = h[k]; hh < pos; hh++)
      s[k][hh] = '.';
    h[k] = pos;
  }
}

/////////////////////////////////////////////////////////////////////////////////////
// Fill in insert states following match state i and fill up gaps with '.' 
void HalfAlignment::AddInsertsAndFillUpGaps(int i) {
  AddInserts(i);
  FillUpGaps();
}

/////////////////////////////////////////////////////////////////////////////////////
// Add gap column '.'
void HalfAlignment::AddChar(char c) {
  for (int k = 0; k < n; k++)
    s[k][h[k]++] = c;
  pos++;
}

/////////////////////////////////////////////////////////////////////////////////////
// Add match state column i as is
void HalfAlignment::AddColumn(int i) {
  for (int k = 0; k < n; k++)
    s[k][h[k]++] = seq[k][m[k][i]];
  pos++;
}

/////////////////////////////////////////////////////////////////////////////////////
// Add match state column i as insert state
void HalfAlignment::AddColumnAsInsert(int i) {
  char c;
  for (int k = 0; k < n; k++)
    if ((c = seq[k][m[k][i]]) != '-' && (c < '0' || c > '9'))
      s[k][h[k]++] = InsertChr(c);
  pos++;
}

/////////////////////////////////////////////////////////////////////////////////////
// Remove all characters c from template sequences
void HalfAlignment::RemoveChars(char c) {
  int k, h, hh;
  for (k = 0; k < n; k++) {
    for (h = hh = 0; h < pos; h++)
      if (s[k][h] != c)
        s[k][hh++] = s[k][h];
    s[k][++hh] = '\0';
  }
}

/////////////////////////////////////////////////////////////////////////////////////
// Transform alignment sequences from A3M to A2M (insert ".")
void HalfAlignment::BuildFASTA() {
  AddInserts(0);
  FillUpGaps();
  for (int i = 1; i <= L; i++) {
    AddColumn(i);
    AddInserts(i);
    FillUpGaps();
  }
  ToFASTA();
}

/////////////////////////////////////////////////////////////////////////////////////
// Transform alignment sequences from A3M to A2M (insert ".")
void HalfAlignment::BuildA2M() {
  AddInserts(0);
  FillUpGaps();
  for (int i = 1; i <= L; i++) {
    AddColumn(i);
    AddInserts(i);
    FillUpGaps();
  }
  AddChar('\0');
}

/////////////////////////////////////////////////////////////////////////////////////
// Transform alignment sequences from A3M to A2M (insert ".")
void HalfAlignment::BuildA3M() {
  AddInserts(0);
  for (int i = 1; i <= L; i++) {
    AddColumn(i);
    AddInserts(i);
  }
  AddChar('\0');
}

/////////////////////////////////////////////////////////////////////////////////////
// Transform alignment sequences from A2M to FASTA ( lowercase to uppercase and '.' to '-')
void HalfAlignment::ToFASTA() {
  for (int k = 0; k < n; k++) {
    uprstr(s[k]);
    strtr(s[k], ".", "-");
  }
}

/////////////////////////////////////////////////////////////////////////////////////
// Align query (HalfAlignment) to template (i.e. hit) match state structure
/////////////////////////////////////////////////////////////////////////////////////
void HalfAlignment::AlignToTemplate(Hit& hit, const char outformat) {
  int i, j;
  int step;    // column of the HMM-HMM alignment (first:nstep, last:1)
  char state;

  if (0) {  //par.loc==0) { //////////////////////////////////////////// STILL NEEDED??
    // If in global mode: Add part of alignment before first MM state
    AddInserts(0);  // Fill in insert states before first match state
    for (i = 1; i < hit.i[hit.nsteps]; i++) {
      AddColumnAsInsert(i);
      AddInserts(i);
      if (outformat <= 2)
        FillUpGaps();
    }
  }

  // Add endgaps (First state must be an MM state!!)
  for (j = 1; j < hit.j[hit.nsteps]; j++) {
    AddChar('-');
  }

  // Add alignment between first and last MM state
  for (step = hit.nsteps; step >= 1; step--) {
    state = hit.states[step];
    i = hit.i[step];

    switch (state) {
      case MM:  //MM pair state (both query and template in Match state)
        AddColumn(i);
        AddInserts(i);
        break;
      case DG:  //D- state
      case MI:  //MI state
        AddColumnAsInsert(i);
        AddInserts(i);
        break;
      case GD:  //-D state
      case IM:  //IM state
        AddChar('-');
        break;
    }
    if (outformat <= 2)
      FillUpGaps();

  }

  if (0) {  //par.loc==0) { //////////////////////////////////////////// STILL NEEDED??

    // If in global mode: Add part of alignment after last MM state
    for (i = hit.i[1] + 1; i <= L; i++) {
      AddColumnAsInsert(i);
      AddInserts(i);
      if (outformat == 2)
        FillUpGaps();
    }
  }

  // Add endgaps 
  for (j = hit.j[1] + 1; j <= hit.L; j++) {
    AddChar('-');
  }

  // Add end-of-string character
  AddChar('\0');
}

/////////////////////////////////////////////////////////////////////////////////////
// Write the a2m/a3m alignment into alnfile 
/////////////////////////////////////////////////////////////////////////////////////
void HalfAlignment::Print(char* alnfile, const char append, char* commentname,
                          const char format[]) {
  int k;      //counts sequences
  int omitted = 0;  // counts number of sequences with no residues in match states
  FILE *outf;
  char* tmp_name = new char[NAMELEN];
  if (strcmp(alnfile, "stdout")) {
    if (append)
      outf = fopen(alnfile, "a");
    else
      outf = fopen(alnfile, "w");
    if (!outf)
      OpenFileError(alnfile, __FILE__, __LINE__, __func__);
  } else
    outf = stdout;

  HH_LOG(DEBUG) << "Writing alignment to " << alnfile << "\n";

  if (!format || strcmp(format, "psi")) {
    if (commentname != NULL)
      fprintf(outf, "#%s\n", commentname);

    for (k = 0; k < n; k++)
      if (k == nss_pred || k == nss_conf || k == nss_dssp || k == nsa_dssp) {
        fprintf(outf, ">%s\n", sname[k]);
        fprintf(outf, "%s\n", s[k]);
      }
    for (k = 0; k < n; k++) {
      if (!(k == nss_pred || k == nss_conf || k == nss_dssp || k == nsa_dssp)) {
        // Print sequence only if it contains at least one residue in a match state
        if (1)  //strpbrk(s[k],"ABCDEFGHIKLMNPQRSTUVWXYZ1234567890"))
        {
          fprintf(outf, ">%s\n", sname[k]);
          fprintf(outf, "%s\n", s[k]);
        } else {
          omitted++;
          HH_LOG(DEBUG)
              << sname[k]
              << " contains no residue in match state. Omitting sequence\n";
        }
      }
    }
    if (omitted) {
      HH_LOG(INFO) << "Omitted " << omitted << " sequences in "
                             << alnfile
                             << " which contained no residue in match state\n";
    }
  } else {
    for (k = 0; k < n; k++) {
      strwrd(tmp_name, sname[k], NAMELEN);
      fprintf(outf, "%-20.20s ", tmp_name);
      char* ptr = s[k];
      for (; *ptr != '\0'; ptr++)
        if (*ptr == 45 || (*ptr >= 65 && *ptr <= 90))
          fprintf(outf, "%c", *ptr);
      fprintf(outf, "\n");
    }
  }
  fclose(outf);
  delete[] tmp_name;
}
